Code and summary plots

Replication code

# vb 9/20/2022, not sure what this was doing, maybe an old version conflict?
#path <- "/Library/Frameworks/R.framework/Versions/3.5/Resources/library"

library(tidyverse)
library(data.table)
## Warning: package 'data.table' was built under R version 4.2.3
library(reshape2)
library(glmnet)
library(ggplot2)
library(ggridges)
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
# find the most recent data
cv.glmnet <- "main_full_cv_glmnet_object_4Sep2025.rds"
results <- "final_forecasts_data_4Sep2025.csv" 

# rename to fix soem variables

# pring when these files were created
file.info(cv.glmnet)$ctime
## [1] "2025-09-06 15:10:28 EDT"
file.info(results)$ctime
## [1] "2025-09-06 15:10:38 EDT"
# load the files
cv.glmnet <- readRDS(cv.glmnet)
results <- fread(results) %>% 
  mutate(
    risk.2yr = risk.2025and2026.from2024, #changed to update years - NB
    risk.1yr =risk.2025only.from2024  ) %>%  #changed to update years - NB
  dplyr::rename(country = ewp_name) %>% 
  #dplyr::select(-partyban.new.2) %>% # the last of the set, to be consistent with country
  #rename(imr.sqrt = imr.fwd.fill.sqrt) %>% 
  as.data.frame()

# get the predictornames, but remove some that don't make sense to change
predictornames <- coef(cv.glmnet) %>% 
    rownames() %>% .[-1]

#predictornames <- predictornames[predictornames!= "partyban.new.2"]
#translate objects into John Ray speak
Xtest=results 
Xtest$risk2017.2yr <- Xtest$risk.2yr
Xtest$country_name <- Xtest$country
model.out <- cv.glmnet
Xtest <- as.data.frame(Xtest) # datatable has stricter requirement


#data validation - note this is giving a warning because of small differences, so I added the code below to loosen up the requirement - NB
#stopifnot(all(Xtest$risk2017.2yr == signif(as.vector(predict(model.out, newx = as.matrix(Xtest[,predictornames]),  s=model.out$lambda.min, type="response")),4)))

# Print both values for debugging
actual_values <- Xtest$risk2017.2yr
predicted_values <- signif(as.vector(predict(model.out, newx = as.matrix(Xtest[, predictornames]), s = model.out$lambda.min, type = "response")), 4)

# Check for exact matches
comparison <- data.frame(Actual = actual_values, Predicted = predicted_values)
print(comparison)
##       Actual Predicted
## 1   0.104100  0.104100
## 2   0.081210  0.081140
## 3   0.080960  0.080940
## 4   0.075240  0.075180
## 5   0.074880  0.074860
## 6   0.072060  0.072000
## 7   0.061740  0.061760
## 8   0.059840  0.059800
## 9   0.055170  0.055120
## 10  0.055060  0.055010
## 11  0.052310  0.052300
## 12  0.048410  0.048400
## 13  0.048270  0.048270
## 14  0.043850  0.043810
## 15  0.042400  0.042400
## 16  0.040030  0.040000
## 17  0.039040  0.039060
## 18  0.037720  0.037700
## 19  0.035840  0.035840
## 20  0.035790  0.035800
## 21  0.033450  0.033450
## 22  0.032760  0.032770
## 23  0.031810  0.031800
## 24  0.031540  0.031550
## 25  0.030260  0.030260
## 26  0.030060  0.030070
## 27  0.027850  0.027840
## 28  0.027190  0.027180
## 29  0.026600  0.026580
## 30  0.026390  0.026390
## 31  0.026370  0.026360
## 32  0.025520  0.025530
## 33  0.025490  0.025470
## 34  0.024870  0.024880
## 35  0.024730  0.024740
## 36  0.024600  0.024600
## 37  0.024110  0.024100
## 38  0.023270  0.023260
## 39  0.023090  0.023070
## 40  0.022880  0.022860
## 41  0.022270  0.022260
## 42  0.022120  0.022110
## 43  0.021230  0.021230
## 44  0.020510  0.020510
## 45  0.020410  0.020400
## 46  0.019580  0.019570
## 47  0.019070  0.019070
## 48  0.018700  0.018710
## 49  0.018540  0.018530
## 50  0.017140  0.017140
## 51  0.016490  0.016500
## 52  0.016290  0.016290
## 53  0.015780  0.015780
## 54  0.014720  0.014720
## 55  0.014490  0.014480
## 56  0.014320  0.014310
## 57  0.014240  0.014240
## 58  0.013550  0.013540
## 59  0.012880  0.012880
## 60  0.012880  0.012870
## 61  0.012430  0.012430
## 62  0.012340  0.012330
## 63  0.012290  0.012290
## 64  0.012050  0.012040
## 65  0.011480  0.011480
## 66  0.011310  0.011300
## 67  0.010980  0.010980
## 68  0.010770  0.010760
## 69  0.010520  0.010520
## 70  0.010160  0.010150
## 71  0.010080  0.010080
## 72  0.010050  0.010060
## 73  0.010050  0.010040
## 74  0.010010  0.010000
## 75  0.009719  0.009722
## 76  0.009706  0.009704
## 77  0.009697  0.009693
## 78  0.009353  0.009345
## 79  0.009023  0.009021
## 80  0.008498  0.008491
## 81  0.008359  0.008357
## 82  0.008321  0.008323
## 83  0.008203  0.008201
## 84  0.007812  0.007805
## 85  0.007548  0.007542
## 86  0.007356  0.007358
## 87  0.007086  0.007087
## 88  0.006975  0.006977
## 89  0.006793  0.006795
## 90  0.006696  0.006698
## 91  0.006446  0.006441
## 92  0.006389  0.006385
## 93  0.006180  0.006183
## 94  0.006168  0.006163
## 95  0.005987  0.005986
## 96  0.005917  0.005917
## 97  0.005911  0.005907
## 98  0.005808  0.005808
## 99  0.005766  0.005766
## 100 0.005594  0.005590
## 101 0.005452  0.005453
## 102 0.005451  0.005446
## 103 0.005397  0.005397
## 104 0.005209  0.005205
## 105 0.005147  0.005142
## 106 0.004898  0.004898
## 107 0.004841  0.004837
## 108 0.004837  0.004832
## 109 0.004782  0.004779
## 110 0.004781  0.004782
## 111 0.004766  0.004762
## 112 0.004716  0.004713
## 113 0.004699  0.004699
## 114 0.004650  0.004651
## 115 0.004588  0.004589
## 116 0.004357  0.004356
## 117 0.004319  0.004318
## 118 0.004296  0.004296
## 119 0.004267  0.004268
## 120 0.004146  0.004144
## 121 0.004130  0.004129
## 122 0.004126  0.004124
## 123 0.004067  0.004068
## 124 0.003913  0.003911
## 125 0.003872  0.003869
## 126 0.003850  0.003850
## 127 0.003805  0.003805
## 128 0.003723  0.003724
## 129 0.003614  0.003614
## 130 0.003581  0.003580
## 131 0.003571  0.003568
## 132 0.003276  0.003275
## 133 0.003234  0.003234
## 134 0.003224  0.003223
## 135 0.003142  0.003140
## 136 0.003089  0.003087
## 137 0.003075  0.003075
## 138 0.003068  0.003069
## 139 0.003031  0.003030
## 140 0.002959  0.002959
## 141 0.002940  0.002938
## 142 0.002931  0.002930
## 143 0.002915  0.002914
## 144 0.002881  0.002881
## 145 0.002855  0.002853
## 146 0.002736  0.002736
## 147 0.002697  0.002696
## 148 0.002678  0.002676
## 149 0.002645  0.002643
## 150 0.002640  0.002637
## 151 0.002617  0.002616
## 152 0.002581  0.002580
## 153 0.002568  0.002567
## 154 0.002511  0.002509
## 155 0.002482  0.002482
## 156 0.002409  0.002408
## 157 0.002334  0.002333
## 158 0.002237  0.002237
## 159 0.002087  0.002087
## 160 0.002042  0.002041
## 161 0.002008  0.002008
## 162 0.001834  0.001833
## 163 0.001832  0.001831
## 164 0.001819  0.001819
## 165 0.001743  0.001743
## 166 0.001741  0.001741
## 167 0.001683  0.001681
## 168 0.001394  0.001395
# Use a tolerance level instead of enforcing exact match
tolerance <- 1e-3  # or another small value
all_equal <- all(abs(actual_values - predicted_values) < tolerance)
stopifnot(all_equal)
# newx: just the Xs, as a matrix
new_preds = matrix(nrow = dim(Xtest)[1], ncol = length(predictornames))
colnames(new_preds) <- predictornames

# for every IV, take the mean of the IV or mode if 0/1, and rerun the glmnet prediction with observations' values set to that IV's mean
for(i in 1:length(predictornames)){
  
  newx = Xtest[, predictornames] %>% as.matrix()
  iter_mu <- rep(NA, nrow(newx))
  
  # if the variable has 3 or fewer unique values, set it to mode, not mean; OR if I've prespecified it
  # notice that this is the mode for the current year, not for all time
  if(length(table(newx[, predictornames[i]] )) <= 3 | predictornames[i] %in% c("religiousfreedom", "mk_ongoing_count_log")){
    iter_mu <- rep(as.numeric(names(tail(sort(table(newx[, predictornames[i]] )), 1))), nrow(newx))
  } else {
    iter_mu <- rep(mean(Xtest[, predictornames[i]], na.rm = T), nrow(newx))
  }
  
  #but some variables are tied together, need to prespecify those here
  
      # set all party ban to category 4 (when partyban.new.2 ==1)
    for(j in 1:length(iter_mu)){
    if(colnames(newx)[predictornames[i]] %in% c('partyban.new.0', 'partyban.new.1') & sum(newx[j, c('partyban.new.0', 'partyban.new.1')]) > 0){
        iter_mu[j] <- 0
    }
  }

  
  #note I changed reg.afr to reg.na - NMB
  for(j in 1:length(iter_mu)){
    if(colnames(newx)[predictornames[i]] %in% c('reg.na', 'reg.eap', 'reg.eur', 'reg.mna', 'reg.sca') & sum(newx[j, c('reg.na', 'reg.eap', 'reg.eur', 'reg.mna', 'reg.sca')]) > 0){
        iter_mu[j] <- 0
    }
  }

  
  # set all ongoing counts to 0
  # which then also has implications for other variables
  #note updated the name of this var - NMB
  if(predictornames[i]=="mk_ongoing_count_log2"){
    
    # set the MK variable
    # this will get implemented later
    # we could manually specify ithere but
    # this is just how the code was set up by Jay
    iter_mu <- rep(0, nrow(newx))

    # set other dependent variables
    # these can execute now
    newx[,"widetargeting"] <- 0
    newx[,"narrowtargeting"] <- 0
  }
  
  newx[, predictornames[i]] <- iter_mu
  
  # vb 9/12/2020, adding the rounding that the model uses
  new_preds[, predictornames[i]] <- signif(predict(model.out, newx = newx,  s="lambda.min", type="response") ,4) 
  
    #vb, not sure what this line is doing because its not saving anywhere so commenting it out
    #predict.cv.glmnet(cv.glmnet, newx = as.matrix(results[,predictornames]),  s="lambda.min", type="response") 

}

# New column names are the 'predictornames' values plus '_new_mean'
colnames(new_preds) <- paste0(predictornames, '_new_mean')

# Combine the new predictions to the full data
#pred_dat <- cbind(newx, new_preds)
#this is wrong, newx has polity2.fl.3 == 0 for all observations; really want X-test, vbauer
pred_dat <- cbind(Xtest[, predictornames], new_preds)

#explore what this is doing
#pred_dat <- data.frame(pred_dat)
#pred_dat$risk.2yr <- Xtest$risk2017.2yr
#pred_dat[,colnames(pred_dat)[grepl("polity|risk", colnames(pred_dat))]]

# compute the prediction shifts as 'new prediction with IVs set to their means - original prediction'
mean_shifts = matrix(nrow = dim(newx)[1], ncol = dim(newx)[2])

# do this for each IV
# someone really needs to teach John Ray about vectorization
for(i in 1:length(predictornames)){
  mean_shifts[, i] <-  Xtest$risk2017.2yr - new_preds[, i]
}

# vincent code, will get overwritten
region.names <- c("reg.na", "reg.eap", "reg.eur", "reg.mna", "reg.sca") #changed reg.afr to reg.na - NMB
party.names <- c('partyban.new.0', 'partyban.new.1')
mean_shifts.save <- mean_shifts
colnames(mean_shifts) <- predictornames
mean_shifts <- as.data.frame(mean_shifts)
mean_shifts$country_name <- results$country
mean_shifts$region <- unlist(apply(mean_shifts[,region.names], 1, function(x) ifelse(sum(x) == 0, 0, x[x!=0])))
mean_shifts$partyban <- unlist(apply(mean_shifts[,party.names], 1, function(x) ifelse(sum(x) == 0, 0, x[x!=0])))
mean_shifts <- mean_shifts[, !(colnames(mean_shifts) %in% c(region.names, party.names))]
saveRDS(mean_shifts, file = paste0("meanshifts_", Sys.Date(), ".rds"))
mean_shifts <- mean_shifts.save

# the sizes of the shifts from the original predictions to the new predictions with each IV changed one at a time are stored in the variables labeled predictornames plus '_diffs_with_mean_shifts'
colnames(mean_shifts) <- paste0(predictornames, '_diffs_with_mean_shifts')

# merge the new IVs to the full data
pred_dat <- cbind(pred_dat, mean_shifts) %>% data.frame()
pred_dat$risk2 <- Xtest$risk2017.2yr
pred_dat$country_name <- as.character(Xtest$country_name)

# create an index to plot the top 25 risk countries
#pred_dat <- pred_dat[rev(order(pred_dat$risk2)),]
#pred_dat$plot_order <- 1:nrow(pred_dat)
#pred_dat <- pred_dat[order(pred_dat$plot_order),]

# Put all the actual mean shifts into one region variable, then join that to the plot data
new_mean_region <- pred_dat$risk2
new_mean_region[pred_dat$reg.na == 1] <- pred_dat$reg.na_new_mean[pred_dat$reg.na == 1] #changed reg.afr to reg.na - NMB
new_mean_region[pred_dat$reg.eap == 1] <- pred_dat$reg.eap_new_mean[pred_dat$reg.eap == 1]
new_mean_region[pred_dat$reg.eur == 1] <- pred_dat$reg.eur_new_mean[pred_dat$reg.eur == 1]
new_mean_region[pred_dat$reg.mna == 1] <- pred_dat$reg.mna_new_mean[pred_dat$reg.mna == 1]
new_mean_region[pred_dat$reg.sca == 1] <- pred_dat$reg.sca_new_mean[pred_dat$reg.sca == 1]

# Put all the new predictions into one partyban variable, then join that to the plot data
new_mean_partyban <- pred_dat$risk2
new_mean_partyban[pred_dat$partyban.new.0 == 1] <- pred_dat$partyban.new.0_new_mean[pred_dat$partyban.new.0 == 1]  
new_mean_partyban[pred_dat$partyban.new.1 == 1] <- pred_dat$partyban.new.1_new_mean[pred_dat$partyban.new.1 == 1]  

pred_dat$new_mean_region <- new_mean_region
pred_dat$new_mean_partyban <- new_mean_partyban

# Same thing with the mean shifts
new_mean_shift_region <- rep(0, nrow(pred_dat))
new_mean_shift_region[pred_dat$reg.na == 1] <- pred_dat$reg.na_diffs_with_mean_shifts[pred_dat$reg.na == 1] #changed reg.afr to reg. na - NMB
new_mean_shift_region[pred_dat$reg.eap == 1] <- pred_dat$reg.eap_diffs_with_mean_shifts[pred_dat$reg.eap == 1]
new_mean_shift_region[pred_dat$reg.eur == 1] <- pred_dat$reg.eur_diffs_with_mean_shifts[pred_dat$reg.eur == 1]
new_mean_shift_region[pred_dat$reg.mna == 1] <- pred_dat$reg.mna_diffs_with_mean_shifts[pred_dat$reg.mna == 1]
new_mean_shift_region[pred_dat$reg.sca == 1] <- pred_dat$reg.sca_diffs_with_mean_shifts[pred_dat$reg.sca == 1]

new_mean_shift_partyban <- rep(0, nrow(pred_dat))
new_mean_shift_partyban[pred_dat$partyban.new.0 == 1] <- pred_dat$partyban.new.0_diffs_with_mean_shifts[pred_dat$partyban.new.0 == 1]
new_mean_shift_partyban[pred_dat$partyban.new.1 == 1] <- pred_dat$partyban.new.1_diffs_with_mean_shifts[pred_dat$partyban.new.1 == 1]

pred_dat$new_mean_shift_region <- new_mean_shift_region
pred_dat$new_mean_shift_partyban <- new_mean_shift_partyban

#check what's going on
#pred_dat[pred_dat$country_name=="Pakistan",colnames(pred_dat)[grepl("polity|risk|country_name", colnames(pred_dat))]]

#changed reg.afr to reg.na - NMB
mean_vars <- c(colnames(new_preds)[!(colnames(new_preds) %in% c('reg.na_new_mean','reg.eap_new_mean','reg.eur_new_mean','reg.mna_new_mean','reg.sca_new_mean',
                                                                'partyban.new.0_new_mean', 'partyban.new.1_new_mean'))], 'new_mean_region', 'new_mean_partyban')

# format the data for plotting
mean_plotdat = melt(pred_dat[, c('country_name', mean_vars)], id.vars = c('country_name'))
mean_plotdat$variable = gsub("_new_mean", "", mean_plotdat$variable)

#changed reg.afr to reg.na - NMB
shift_vars <- c(colnames(mean_shifts)[!colnames(mean_shifts) %in% c('reg.na_diffs_with_mean_shifts', 'reg.eap_diffs_with_mean_shifts', 'reg.eur_diffs_with_mean_shifts', 'reg.mna_diffs_with_mean_shifts', 'reg.sca_diffs_with_mean_shifts', 
                                                                    'partyban.new.0_diffs_with_mean_shifts', 'partyban.new.1_diffs_with_mean_shifts')], 'new_mean_shift_region','new_mean_shift_partyban')

shift_size_plotdat = melt(pred_dat[, c('country_name', shift_vars)], id.vars = c('country_name'))
shift_size_plotdat$variable = gsub("_diffs_with_mean_shifts", "", shift_size_plotdat$variable)


# merge the original predictions to the predicted mean shifts
mean_shift_plotdat_1 = pred_dat[, c('country_name', 'risk2') ]

mean_shift_plotdat_2 = melt(pred_dat[, c('country_name', shift_vars)], id.vars = c('country_name'))

mean_shift_plotdat = dplyr::left_join(mean_shift_plotdat_1, mean_shift_plotdat_2, by="country_name")

mean_shift_plotdat$variable = gsub("_new_mean", "", mean_shift_plotdat$variable, fixed = T)

colnames(mean_shift_plotdat) <- c('country_name','start','variable','end')

#pred_dat <- pred_dat[, !colnames(pred_dat) %in% c('includesnonstate')]
#pred_dat <- pred_dat[rank(pred_dat$risk2), ] #not the correct function to use, needs to be order, vbauer
pred_dat <- pred_dat[order(pred_dat$risk2), ] #

#data validation
stopifnot(all(table(pred_dat$country_name)==1))

write.csv(pred_dat, 'new_prediction_data_2.csv', row.names = F)

#predictornames <- predictornames[!predictornames %in% c('includesnonstate')]

Big shifters

Note,removing durable.ln_diffs_with_mean_shifts from colstouse ( 8/18/2020 )

# Make thing that gives top X shifters and their amount or at least sign, by country

# drop the intercept - added from Chad's code - NMB
predictornames <- predictornames[-1]

# specify the columns to use, which means getting rid of region and partyban as well as some other variables that dont make sense
#changed reg.afr to reg.na - NMB
colstouse <- predictornames %>% paste0(., "_diffs_with_mean_shifts")
colstouse[colstouse == "reg.na_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.eap_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.eur_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.mna_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.sca_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "partyban.new.0_diffs_with_mean_shifts"] <- "new_mean_shift_partyban"
colstouse[colstouse == "partyban.new.1_diffs_with_mean_shifts"] <- "new_mean_shift_partyban"
colstouse[colstouse == "year_diffs_with_mean_shifts"] <- NA # does not make sense to shift
colstouse[colstouse == "year_sq_diffs_with_mean_shifts"] <-  NA # does not make sense to shift
colstouse <- na.omit(colstouse)
colstouse <- unique(colstouse) # some are now duplicated, so remove those
colstouse <- c("country_name", "risk2", colstouse) # add some additional columns


# all variables should be in pred_data
stopifnot(length(colstouse[!(colstouse %in% colnames(pred_dat))])==0)

usefulshiftdata = pred_dat[,colstouse]

colnames(usefulshiftdata)=gsub(colnames(usefulshiftdata), pattern="_diffs_with_mean_shifts", replacement="")

usefulshiftdata = usefulshiftdata %>% arrange(-risk2)

# Create long list where first column is country name repeated P times; second column holds the sequence of top P "shifting" variables for that country, and the next column contains the amount (and sign) of the corresponding shifts. 

P=5  # number of top factors to include
rowindex=1
bigshifters=matrix(NA,nrow=P*nrow(usefulshiftdata), ncol=3)
colnames(bigshifters) = c("country_name","variable","shift")
for (j in 1:nrow(usefulshiftdata)){
  print(usefulshiftdata$country_name[j])
  subdat=-1*usefulshiftdata[j,3:ncol(usefulshiftdata)] #-1 to change direction so +1 is higher risk.
  
  #orderindex=suppressWarnings(order(abs(subdat), decreasing=TRUE))
    orderindex = order(abs(as.numeric(subdat)), decreasing=TRUE) #changed to match Chad's code - NMB
  keepers=subdat[orderindex][1:P]
  
  bigshifters[seq(rowindex,rowindex+P-1),"country_name"]=usefulshiftdata$country_name[j]  

  bigshifters[seq(rowindex,rowindex+P-1),"variable"] = names(keepers)  
  bigshifters[seq(rowindex,rowindex+P-1),"shift"] = as.numeric(keepers)  

  rowindex=rowindex+P  
}
## [1] "Burma/Myanmar"
## [1] "Chad"
## [1] "Sudan"
## [1] "India"
## [1] "Burkina Faso"
## [1] "Pakistan"
## [1] "Afghanistan"
## [1] "Nigeria"
## [1] "Yemen"
## [1] "Guinea"
## [1] "Niger"
## [1] "Indonesia"
## [1] "Ethiopia"
## [1] "Democratic Republic of Congo"
## [1] "Iran"
## [1] "Cambodia"
## [1] "Mali"
## [1] "Bangladesh"
## [1] "Iraq"
## [1] "Syria"
## [1] "Philippines"
## [1] "Russia"
## [1] "Somalia"
## [1] "Mozambique"
## [1] "Turkey"
## [1] "Angola"
## [1] "China"
## [1] "Laos"
## [1] "Republic of Congo"
## [1] "Tajikistan"
## [1] "South Sudan"
## [1] "Egypt"
## [1] "Thailand"
## [1] "Uganda"
## [1] "Central African Republic"
## [1] "Ivory Coast"
## [1] "Tanzania"
## [1] "Algeria"
## [1] "Eritrea"
## [1] "Burundi"
## [1] "Nepal"
## [1] "South Africa"
## [1] "Kenya"
## [1] "Cameroon"
## [1] "Sierra Leone"
## [1] "Malawi"
## [1] "Liberia"
## [1] "Sri Lanka"
## [1] "Democratic Republic of Vietnam"
## [1] "Gabon"
## [1] "Libya"
## [1] "Haiti"
## [1] "Zimbabwe"
## [1] "Colombia"
## [1] "Togo"
## [1] "Mauritania"
## [1] "Madagascar"
## [1] "Equatorial Guinea"
## [1] "Rwanda"
## [1] "Ukraine"
## [1] "Zambia"
## [1] "Morocco"
## [1] "Bahrain"
## [1] "Nicaragua"
## [1] "Uzbekistan"
## [1] "North Korea"
## [1] "Namibia"
## [1] "Senegal"
## [1] "Peru"
## [1] "Georgia"
## [1] "Jordan"
## [1] "Djibouti"
## [1] "Bosnia and Herzegovina"
## [1] "Brazil"
## [1] "Argentina"
## [1] "Lebanon"
## [1] "United States of America"
## [1] "Bhutan"
## [1] "Venezuela"
## [1] "Mexico"
## [1] "Israel"
## [1] "Botswana"
## [1] "Guinea-Bissau"
## [1] "United Arab Emirates"
## [1] "Papua New Guinea"
## [1] "El Salvador"
## [1] "Benin"
## [1] "Kazakhstan"
## [1] "Eswatini"
## [1] "Comoros"
## [1] "Guatemala"
## [1] "Azerbaijan"
## [1] "South Korea"
## [1] "Fiji"
## [1] "Canada"
## [1] "Moldova"
## [1] "Ecuador"
## [1] "Timor-Leste"
## [1] "Ghana"
## [1] "Poland"
## [1] "Bolivia"
## [1] "Romania"
## [1] "Honduras"
## [1] "Kyrgystan"
## [1] "Paraguay"
## [1] "Hungary"
## [1] "Saudi Arabia"
## [1] "United Kingdom"
## [1] "Serbia"
## [1] "Japan"
## [1] "Malaysia"
## [1] "Cyprus"
## [1] "Turkmenistan"
## [1] "Panama"
## [1] "Czech Republic"
## [1] "Maldives"
## [1] "Slovakia"
## [1] "Dominican Republic"
## [1] "Germany"
## [1] "Spain"
## [1] "France"
## [1] "Croatia"
## [1] "Chile"
## [1] "Belarus"
## [1] "Italy"
## [1] "Lesotho"
## [1] "Greece"
## [1] "The Gambia"
## [1] "Australia"
## [1] "Uruguay"
## [1] "Guyana"
## [1] "Netherlands"
## [1] "Switzerland"
## [1] "Tunisia"
## [1] "Belgium"
## [1] "Oman"
## [1] "Kuwait"
## [1] "Singapore"
## [1] "Mongolia"
## [1] "New Zealand"
## [1] "Portugal"
## [1] "Austria"
## [1] "Bulgaria"
## [1] "Qatar"
## [1] "Sweden"
## [1] "Cuba"
## [1] "Kosovo"
## [1] "Macedonia"
## [1] "Albania"
## [1] "Mauritius"
## [1] "Denmark"
## [1] "Armenia"
## [1] "Ireland"
## [1] "Solomon Islands"
## [1] "Finland"
## [1] "Norway"
## [1] "Lithuania"
## [1] "Latvia"
## [1] "Slovenia"
## [1] "Cape Verde"
## [1] "Estonia"
## [1] "Luxembourg"
## [1] "Montenegro"
## [1] "Costa Rica"
## [1] "Malta"
## [1] "Jamaica"
## [1] "Trinidad and Tobago"
## [1] "Suriname"
write.csv(bigshifters, file = paste0("bigshiftvariables_", Sys.Date(), ".csv"))

Which traits are most common

The problem with comparing differences in the average value of these variables is that variables with a larger scale (i.e. from -100 to 100) will appear to be more important than variables with a smaller scale (-1 to 1). I show the difference in the actual means and also between the standardized means (centering all variables to have mean 0 and standard deviation 1).

Variable definitions

  • mean.risk: the average value of the variable among countries coded as at risk (4%+ predicted chance or any mass killing ever )
  • mean.norisk: the average value of the variable among countries coded as at not at risk
  • diff.mean: difference in the mean value between countries at risk and not at risk
  • diff.mean.abs: absolute value of the difference in the mean value between countries at risk and not at risk
  • sd.risk: average value of the centered variables for countries coded as at risk. For example, sd.risk for battledeaths.log2 is 1.96 which means that countries coded as at risk are on average 1.96 standard deviations above the mean on battle deaths (this is a lot).
  • sd.norisk: average value of the centered variables for countries coded as not at risk.
  • diff.sd: difference in the mean scaled value between countries at risk and not at risk
  • diff.sd.abs: absolute value of the difference in the scaled value between countries at risk and not at risk

Note to self

  • some of the interpretation might be easier if the variables were flipped based on direction of effect (i.e. freemovement changed to unfreemovement)

Among high-risk/low-risk countries. Setting the cut-off for “high risk” as 4% + predicted chance of mass killing in the next two years.

Among countries that have experienced mass killings. Setting the cut-off for “high risk” as any mass killing ever.

Variable importances

importances <- importances[order(-abs(importances$var_importance)),]
importances <- importances[!is.na(importances$var_importance),]
importances$varname <- factor(importances$varname, levels = rev(importances$varname))
importances$var_importance <- abs(importances$var_importance)


ggplot(importances, aes(x = var_importance, y = varname)) +
    geom_point() +
    labs(x = "Variable Importance", y = "Variable") +
    theme_bw() +
    margin

Plots of prediction shifts from original prediction

The value for partyban is 0 which is categories 0/1 in VDEM (this note was prev here - NB)

The value for region is North America (changed from africa - NB)

## Setting new_mean_shift_partyban to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: partyban.2: no parties banned

## Setting new_mean_shift_region to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: africa

## Setting even_civilrights to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

## Setting popsize.log2 to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 23.55

## Setting mk_ever to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

## Setting un_imr_sqrt to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 3.9

## Setting coup.try.5yr to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

## Setting pol_killing_approved to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

## Setting battledeaths.log2 to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 1.66

## Setting discrimpop to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0.02

## Setting ses_power_dist to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0.39

## Setting efindex to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0.46

## Setting ios.iccpr1 to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 1

## Setting social_power_dist to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 1

## Setting narrowtargeting to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

## Setting minorityrule to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

## Setting newcountry to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0

Shifts by country

## MK risk change in Burma/Myanmar
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Chad
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Sudan
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in India
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Burkina Faso
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Pakistan
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Afghanistan
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Nigeria
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Yemen
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Guinea
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Niger
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Indonesia
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Ethiopia
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Democratic Republic of Congo
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Iran
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Cambodia
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Mali
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Bangladesh
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Iraq
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Syria
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Philippines
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Russia
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Somalia
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Mozambique
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Turkey
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Angola
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in China
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Laos
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Republic of Congo
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Tajikistan
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in South Sudan
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Egypt
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Thailand
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Uganda
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Central African Republic
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Ivory Coast
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Tanzania
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Algeria
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Eritrea
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Burundi
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Nepal
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in South Africa
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Kenya
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Cameroon
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Sierra Leone
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Malawi
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Liberia
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Sri Lanka
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Democratic Republic of Vietnam
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Gabon
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Haiti
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Equatorial Guinea
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Rwanda
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in United States of America
## by shifting variables to mean/mode, 2-year horizon

## MK risk change in Venezuela
## by shifting variables to mean/mode, 2-year horizon